

load("Recalls_merged_2019.RData")
Data$kenteken<-as.character(Data$kenteken)

#Remove about 2000 duplicate cars
Data$dupl<-duplicated(Data$kenteken)
kentekendupl<-subset(Data, dupl==TRUE, select=kenteken)
Data<-subset(Data, !c(kenteken %in% kentekendupl$kenteken) )
Data$dupl<-NULL


#Get monthly sequence
months<-seq(as.Date("2017/11/1"), as.Date("2019/03/01"), "months")


#Create base panel
Panel<-expand.grid(Data$kenteken, months)
names(Panel)<-c("kenteken", "date")
rm(months)



#Get date first registration, in monthly format (date changed to 1)
#HERE ALSO ADD TYPE APPROVAL, VARIANT, VERSION
Temp<-subset(Data, select=c("kenteken", "typegoedkeuringsnummer", "variant", "uitvoering", "datumeersteafgiftenederland", "datumeerstetoelating"))
day(Temp$datumeersteafgiftenederland)<-01
day(Temp$datumeerstetoelating)<-01
#Merge
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE)
#New cars already removed in 1d
rm(Temp)

Panel<-Panel[order(Panel$kenteken, Panel$date),]

#Remove dates after car was exported abroad
Temp<-subset(Data, select=c("kenteken", "exported_last")) 
#Now the month is the month the change took place, not the month after
Temp$exported_last<-as.Date(Temp$exported_last, format="%Y-%m-%d") %m-% months(1) 
day(Temp$exported_last)<-01
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
Panel<-subset(Panel, date<=exported_last | is.na(exported_last)==TRUE)

#Dummy for export date


#Get resale dates
Temp<-subset(Data, select=c("kenteken", "datumtenaamstelling_all")) 
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
Panel$resale<-0
i<-str_detect(Panel$datumtenaamstelling_all, paste(month(Panel$date),year(Panel$date), sep="/"))
Panel$resale[i]<-1





#Get recall dates and recall fixing dates
Temp<-subset(Data, select=c(kenteken, referentiecoderdw, recall_new, recall_fixed)) 
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE, all.y=FALSE)
day(Panel$recall_new)<-01
day(Panel$recall_fixed)<-01
#Dummy for date of recall
Panel$rec_new<-0
i<-Panel$recall_new==Panel$date & is.na(Panel$recall_new)==FALSE
Panel$rec_new[i]<-1
#Dummy for recall occurred (over time)
Panel$rec_new_t<-0
i<-Panel$recall_new<=Panel$date & is.na(Panel$recall_new)==FALSE
Panel$rec_new_t[i]<-1

#Generate number of months from recall
Panel$rec_new_dist<-1+((year(Panel$date)*12)+(month(Panel$date)))-((year(Panel$recall_new)*12)+month(Panel$recall_new))
Panel$rec_new_dist_both<-Panel$rec_new_dist
Panel$rec_new_dist[Panel$rec_new_dist<0]<-0
Panel$rec_new_dist[is.na(Panel$rec_new_dist)]<-0
Panel$rec_new_dist_both[is.na(Panel$rec_new_dist_both)]<-0

Panel$rec_new_dist_cat<-0
Panel$rec_new_dist_cat[Panel$rec_new_dist>=1 & Panel$rec_new_dist<=3]<-1
Panel$rec_new_dist_cat[Panel$rec_new_dist>=4 & Panel$rec_new_dist<=6]<-2
Panel$rec_new_dist_cat[Panel$rec_new_dist>=7 & Panel$rec_new_dist<=9]<-3
Panel$rec_new_dist_cat[Panel$rec_new_dist>=10 & Panel$rec_new_dist<=12]<-4
Panel$rec_new_dist_cat[Panel$rec_new_dist>=13]<-5
table(Panel$rec_new_dist_cat)

#Dummy for date of fixing
Panel$rec_fixed<-0
i<-Panel$recall_fixed==Panel$date & is.na(Panel$recall_fixed)==FALSE
Panel$rec_fixed[i]<-1
#Dummy for fixing occurred (over time)
Panel$rec_fixed_t<-0
i<-Panel$recall_fixed<=Panel$date & is.na(Panel$recall_fixed)==FALSE
Panel$rec_fixed_t[i]<-1

#Generate id for type-variant-version
Panel$id_ver<-as.factor(paste(Panel$typegoedkeuringsnummer, Panel$variant, Panel$uitvoering, sep="_"))


Panel<-Panel[order(Panel$id, Panel$kenteken, Panel$date),]


Panel$kenteken<-as.character(Panel$kenteken)

#Drop obs if date<date car gets in NL
i<-Panel$datumeersteafgiftenederland>Panel$date
table(i, useNA="always")
Panel<-subset(Panel, i==FALSE)
rm(i)

#Generate variable on age of car
Panel$age_car<-floor( ( (month(Panel$date)+year(Panel$date)*12)-(month(Panel$datumeerstetoelating)+year(Panel$datumeerstetoelating)*12) )/12 )
#Generate variable on time in NL
Panel$time_NL<-floor( ( (month(Panel$date)+year(Panel$date)*12)-(month(Panel$datumeersteafgiftenederland)+year(Panel$datumeersteafgiftenederland)*12) )/12 )


#Get variable on months before last resale or first arrival in NL
#Create panel with kenteken, date of first arrival in NL and all resales date (one column for each resale)
Temp<-cbind(Data$kenteken,Data$datumeersteafgiftenederland,as.data.frame(str_extract_all(Data$datumtenaamstelling_all, "[[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{4}", simplify=TRUE)))
#If factor transform in character class
i <- sapply(Temp, is.factor)
Temp[i] <- lapply(Temp[i], as.character)
#Covert dates in date format
Temp[,2]<-as.Date(Temp[,2], format="%Y-%m-%d")
c<-ncol(Temp)
for(j in 3:c) {
  Temp[,j]<-as.Date(Temp[,j], format="%d/%m/%Y")  
}

#Express first arrival in NL in number of months from default R date
Temp1<-as.data.frame(cbind(Temp[,1],as.numeric(as.character(c(month(Temp[,2])+year(Temp[,2])*12)))), stringsAsFactors = FALSE)
names(Temp1)<-c("kenteken", "lastsale")
Temp1$lastsale<-as.numeric(Temp1$lastsale)

#Express date in number of months from default R date and take difference from first arrival in NL
Panel<-merge(Panel, Temp1, by="kenteken", all.x=TRUE, all.y=FALSE)
Panel$difflastsale<-Panel$lastsale-as.numeric( month(Panel$date)+year(Panel$date)*12 )
Panel$lastsale<-NULL
rm(Temp1)

#Repeat the same operation for all other date, keep the lowest difference (i.e. the closest date)
for (j in 3:c) {
  Temp1<-as.data.frame(cbind(Temp[,1],as.numeric(as.character(c(month(Temp[,j])+year(Temp[,j])*12)))), stringsAsFactors = FALSE)
  names(Temp1)<-c("kenteken", "lastsale")  
  Temp1$lastsale<-as.numeric(Temp1$lastsale)
  Panel<-merge(Panel, Temp1, by="kenteken", all.x=TRUE, all.y=FALSE)
  Panel$temp<-Panel$lastsale-as.numeric( month(Panel$date)+year(Panel$date)*12 )
  i<-Panel$temp<0 & Panel$temp>Panel$difflastsale & is.na(Panel$temp)==FALSE
  Panel$difflastsale[i]<-Panel$temp[i]
  Panel$lastsale<-NULL
  rm(Temp1,i)
  
}

Panel$difflastsale<- -Panel$difflastsale
Panel$temp<-NULL


#Note: This panel still contains very old vehicles (typically first registered before 2000) with no type approval
save(Panel, file="Panel_Full.RData")
rm(Data, Temp)
gc()

